{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "\n",
    "from src.byzerllm.sensevoice import init_model, format_str_v3\n",
    "import base64\n",
    "import json\n",
    "\n",
    "model_dir = \"/Users/allwefantasy/models/SenseVoiceSmall\"\n",
    "model, _ = init_model(model_dir)\n",
    "\n",
    "# Prepare test audio\n",
    "with open(\n",
    "    \"/Users/allwefantasy/models/SenseVoiceSmall/example/zh.mp3\", \"rb\"\n",
    ") as audio_file:\n",
    "    audio_data = audio_file.read()\n",
    "    base64_audio = base64.b64encode(audio_data).decode(\"utf-8\")        \n",
    "    # Prepare input in the expected format\n",
    "    test_input = json.dumps(\n",
    "        {\n",
    "                        \"type\": \"audio\",\n",
    "                        \"audio\": f\"data:audio/wav;base64,{base64_audio}\",\n",
    "                    }\n",
    "    )\n",
    "\n",
    "    # Call stream_chat\n",
    "    result = model.stream_chat(tokenizer=None, ins = test_input)\n",
    "\n",
    "    # Process and print the result\n",
    "    if result and len(result) > 0:\n",
    "        output, metadata = result[0]\n",
    "        parsed_output = json.loads(output)\n",
    "        formatted_text = parsed_output.get(\"text\", \"\")\n",
    "        print(\"Transcription:\", formatted_text)\n",
    "        print(\"Metadata:\", metadata)\n",
    "    else:\n",
    "        print(\"No result returned from the model.\")\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
